# steps 4,5, 6 use euclidean distance
library(plotly)
library(seriation)

Question 1.

# keep columns 1,2,5,6,7,9,10,16,17,18,19
p_e <- prices_earnings[, c(1,2,5,6,7,9,10,16,17,18,19)]
rownames(p_e) <- p_e[[1]]

Question 2.

Without doing any reordering We cannot identify any clusters or outliers.

#p_e_sc %>% 
  plot_ly(x =~colnames(p_e_sc), y =~rownames(p_e_sc),
    z = ~p_e_sc, type = "heatmap", 
    colors = colorRamp(c("black","red"))
  ) %>%
  layout(title =  "Heatmap of prices and earnings",
         xaxis = list(title = "Price-Earnings Indicators- No order", zeroline = FALSE),
         yaxis = list(title = "Cities", zeroline = FALSE)
  )

Question 3.

# seriation needs to permute rows and columns, thus distance by row and column
p_e_rdist <- dist(p_e_sc, method = "euclidean")
p_e_cdist <- dist(t(p_e_sc), method = "euclidean")
# make sure that results are reproducible
set.seed(1011)
# get orders of the row and col distances; Hamilton path length
order1 <- get_order(seriate(p_e_rdist, method = "HC"))
order2 <-get_order(seriate(p_e_cdist, method = "HC"))
p_reord <- p_e_sc[rev(order1), order2]
plot_ly(x =~colnames(p_reord), y =~rownames(p_reord),
    z = ~p_reord, type = "heatmap", 
    colors = colorRamp(c("black","red"))
  ) %>%
  layout(title =  "Heatmap of prices and earnings (Euclid dist)",
         xaxis = list(title = "Price-Earnings Indicators", zeroline = FALSE),
         yaxis = list(title = "Cities", zeroline = FALSE)
  )
# computing distance as one minus correlation
p_e_cor <- as.dist((1 - cor(p_e_sc))/2)
# as distance
#p_e_cor_dist <- as.dist(p_e_cor)
p_e_cor1 <- as.dist((1 - cor(t(p_e_sc)))/2)
plot_ly(x =~colnames(p_reord2), y =~rownames(p_reord2),
    z = ~p_reord2, type = "heatmap", 
    colors = colorRamp(c("black","red"))
  ) %>%
  layout(title =  "Heatmap of prices and earnings (Cor dist)",
         xaxis = list(title = "Price-Earnings Indicators", zeroline = FALSE),
         yaxis = list(title = "Cities", zeroline = FALSE)
  )

The ordering by euclidean distance produces a heat map that is easier to analyze. At first glance we can perceive four general regions of two groups. The first group heat map color tends towards a brighter shade of red while the second group tend towards a darker shade of red/black. Although these groups can be seen in the correlation distance heat map, it is not as clear as the first.

Based on the euclidean distance heat map, net wage tends to higher values from Dubai while the number of hours worked decrease. This is the opposite to cities like Delhi,Bankok and Seoul. Interestingly food costs are generally low in the cities with highee working hours. Caracas is an outlier because food costs are high while net wage and the number of hours worked remains low.

Question 4.

# use p_e_rdist and p_e_cdist (euclidean distance)
ord_q4_1 <- get_order(seriate(p_e_rdist, method = "TSP"))
order_q4_2 <-get_order(seriate(p_e_cdist, method = "TSP"))
p_reord_q4 <- p_e_sc[rev(ord_q4_1), order_q4_2]
plot_ly(x =~colnames(p_reord_q4), y =~rownames(p_reord_q4),
    z = ~p_reord_q4, type = "heatmap", 
    colors = colorRamp(c("black","red"))
  ) %>%
  layout(title =  "Heatmap of prices and earnings (Euclid dist- TSP)",
         xaxis = list(title = "Price-Earnings Indicators", zeroline = FALSE),
         yaxis = list(title = "Cities", zeroline = FALSE)
  )
# function creterion to compare unordered distance and ordered
# distance = p_e_rdist (row distance)
# or = order
or1 <- seriate(p_e_rdist, method = "HC")
or2 <- seriate(p_e_rdist, method = "TSP")
result1 <- rbind(unordered = criterion(p_e_rdist), ordered = criterion(p_e_rdist,or1 ))
result2 <- rbind(unordered = criterion(p_e_rdist), ordered = criterion(p_e_rdist,or2 ))
result1
               2SUM AR_deviations AR_events      BAR      Cor_R Gradient_raw Gradient_weighted  Inertia
unordered 1012004.5     107139.67     61656 29259.38 0.04268063        -4032         -11081.08 17886336
ordered    775013.8      25528.34     30424 20066.38 0.18102048        58432         143609.49 24139500
          Lazy_path_length Least_squares        LS       ME Moore_stress Neumann_stress Path_length      RGAR
unordered        10126.431       3575435 1006126.8 568.2673     986.9925       553.9889    281.7269 0.5169014
ordered           4443.523       3369181  902999.7 642.9155     477.8214       274.1051    144.4925 0.2550637
result2
               2SUM AR_deviations AR_events      BAR      Cor_R Gradient_raw Gradient_weighted  Inertia
unordered 1012004.5     107139.67     61656 29259.38 0.04268063        -4032         -11081.08 17886336
ordered    881105.6      56623.15     43377 19643.51 0.08453357        32526          79115.95 20501659
          Lazy_path_length Least_squares        LS       ME Moore_stress Neumann_stress Path_length      RGAR
unordered        10126.431       3575435 1006126.8 568.2673     986.9925       553.9889    281.7269 0.5169014
ordered           4210.798       3455172  945995.4 651.8822     427.3238       243.2550    119.7929 0.3636569

HC solver has shorter path lenght compared to TSP solver

LS0tDQp0aXRsZTogIlZpc3VhbGl6YXRpb24gTGFiIDMiDQphdXRob3I6ICJSb3NobmkgU3VuZGFyYW11cnRoeSAocm9zc3U4MDkpICYgQnJpYW4gTWFzaW5kZSAoYnJpbWE3NDgpIg0KZGF0ZTogIjI2IFNlcHRlbWJlciAyMDE4Ig0Kb3V0cHV0Og0KICBodG1sX2RvY3VtZW50Og0KICAgIGRmX3ByaW50OiBwYWdlZA0KICBodG1sX25vdGVib29rOg0KICAgIHRoZW1lOiBqb3VybmFsDQogIHBkZl9kb2N1bWVudDogZGVmYXVsdA0KZm9udHNpemU6IDExcHQNCmJpYmxpb2dyYXBoeTogcmVmZXJlbmNlcy5iaWINCi0tLQ0KDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCBwYWdlZC5wcmludD1GQUxTRX0NCiMgc3RlcHMgNCw1LCA2IHVzZSBldWNsaWRlYW4gZGlzdGFuY2UNCmxpYnJhcnkocGxvdGx5KQ0KbGlicmFyeShzZXJpYXRpb24pDQpgYGANCg0KYGBge3IgZGF0YSwgZWNobyA9IEZBTFNFfQ0KcHJpY2VzX2Vhcm5pbmdzIDwtIHJlYWQuZGVsaW0oInByaWNlcy1hbmQtZWFybmluZ3MudHh0IikNCmBgYA0KIyMjIFF1ZXN0aW9uIDEuDQpgYGB7cn0NCiMga2VlcCBjb2x1bW5zIDEsMiw1LDYsNyw5LDEwLDE2LDE3LDE4LDE5DQoNCnBfZSA8LSBwcmljZXNfZWFybmluZ3NbLCBjKDEsMiw1LDYsNyw5LDEwLDE2LDE3LDE4LDE5KV0NCg0Kcm93bmFtZXMocF9lKSA8LSBwX2VbWzFdXQ0KYGBgDQoNCmBgYHtyIHNjYWxlLCBlY2hvID0gRkFMU0V9DQojIHF1ZXN0aW9uIDIgc2NhbGluZw0KcF9lX3NjIDwtIHNjYWxlKHBfZVssLTFdKQ0KDQpgYGANCg0KIyMjIFF1ZXN0aW9uIDIuDQpXaXRob3V0IGRvaW5nIGFueSByZW9yZGVyaW5nIFdlIGNhbm5vdCBpZGVudGlmeSBhbnkgY2x1c3RlcnMgb3Igb3V0bGllcnMuDQoNCmBgYHtyIGhlYXRtYXB9DQojcF9lX3NjICU+JSANCiAgcGxvdF9seSh4ID1+Y29sbmFtZXMocF9lX3NjKSwgeSA9fnJvd25hbWVzKHBfZV9zYyksDQogICAgeiA9IH5wX2Vfc2MsIHR5cGUgPSAiaGVhdG1hcCIsIA0KICAgIGNvbG9ycyA9IGNvbG9yUmFtcChjKCJibGFjayIsInJlZCIpKQ0KICApICU+JQ0KICBsYXlvdXQodGl0bGUgPSAgIkhlYXRtYXAgb2YgcHJpY2VzIGFuZCBlYXJuaW5ncyIsDQogICAgICAgICB4YXhpcyA9IGxpc3QodGl0bGUgPSAiUHJpY2UtRWFybmluZ3MgSW5kaWNhdG9ycyIsIHplcm9saW5lID0gRkFMU0UpLA0KICAgICAgICAgeWF4aXMgPSBsaXN0KHRpdGxlID0gIkNpdGllcyIsIHplcm9saW5lID0gRkFMU0UpDQogICkNCmBgYA0KDQojIyMgUXVlc3Rpb24gMy4NCg0KYGBge3IgcXVlc3Rpb24zX2FfZGlzdH0NCiMgc2VyaWF0aW9uIG5lZWRzIHRvIHBlcm11dGUgcm93cyBhbmQgY29sdW1ucywgdGh1cyBkaXN0YW5jZSBieSByb3cgYW5kIGNvbHVtbg0KcF9lX3JkaXN0IDwtIGRpc3QocF9lX3NjLCBtZXRob2QgPSAiZXVjbGlkZWFuIikNCg0KcF9lX2NkaXN0IDwtIGRpc3QodChwX2Vfc2MpLCBtZXRob2QgPSAiZXVjbGlkZWFuIikNCmBgYA0KDQpgYGB7ciBxdWVzdGlvbjNfYV9vcmRlcn0NCiMgbWFrZSBzdXJlIHRoYXQgcmVzdWx0cyBhcmUgcmVwcm9kdWNpYmxlDQpzZXQuc2VlZCgxMDExKQ0KIyBnZXQgb3JkZXJzIG9mIHRoZSByb3cgYW5kIGNvbCBkaXN0YW5jZXM7IEhhbWlsdG9uIHBhdGggbGVuZ3RoDQpvcmRlcjEgPC0gZ2V0X29yZGVyKHNlcmlhdGUocF9lX3JkaXN0LCBtZXRob2QgPSAiSEMiKSkNCg0Kb3JkZXIyIDwtZ2V0X29yZGVyKHNlcmlhdGUocF9lX2NkaXN0LCBtZXRob2QgPSAiSEMiKSkNCg0KcF9yZW9yZCA8LSBwX2Vfc2NbcmV2KG9yZGVyMSksIG9yZGVyMl0NCmBgYA0KDQpgYGB7ciBxdWVzdGlvbjNfYV9oZWF0fQ0KcGxvdF9seSh4ID1+Y29sbmFtZXMocF9yZW9yZCksIHkgPX5yb3duYW1lcyhwX3Jlb3JkKSwNCiAgICB6ID0gfnBfcmVvcmQsIHR5cGUgPSAiaGVhdG1hcCIsIA0KICAgIGNvbG9ycyA9IGNvbG9yUmFtcChjKCJibGFjayIsInJlZCIpKQ0KICApICU+JQ0KICBsYXlvdXQodGl0bGUgPSAgIkhlYXRtYXAgb2YgcHJpY2VzIGFuZCBlYXJuaW5ncyAoRXVjbGlkIGRpc3QgLSBIQykiLA0KICAgICAgICAgeGF4aXMgPSBsaXN0KHRpdGxlID0gIlByaWNlLUVhcm5pbmdzIEluZGljYXRvcnMiLCB6ZXJvbGluZSA9IEZBTFNFKSwNCiAgICAgICAgIHlheGlzID0gbGlzdCh0aXRsZSA9ICJDaXRpZXMiLCB6ZXJvbGluZSA9IEZBTFNFKQ0KICApDQpgYGANCg0KDQpgYGB7ciBxdWVzdGlvbjNfYl9kaXN0fQ0KIyBjb21wdXRpbmcgZGlzdGFuY2UgYXMgb25lIG1pbnVzIGNvcnJlbGF0aW9uDQoNCnBfZV9jb3IgPC0gYXMuZGlzdCgoMSAtIGNvcihwX2Vfc2MpKS8yKQ0KDQpwX2VfY29yMSA8LSBhcy5kaXN0KCgxIC0gY29yKHQocF9lX3NjKSkpLzIpDQpgYGANCg0KYGBge3IgcXVlc3Rpb24zX2Jfb3JkZXJ9DQojIHNldCBzZWVkIHRvIGVuc3VyZSByZXN1bHRzIGFyZSByZXByb2R1Y2libGUNCnNldC5zZWVkKDEyMTIpDQoNCiMgZ2V0IG9yZGVycyBmb3IgY29sdW1ucyBhbmQgcm93cw0Kb3JkMSA8LSBnZXRfb3JkZXIoc2VyaWF0ZShwX2VfY29yLCBtZXRob2QgPSAiSEMiKSkNCg0Kb3JkMiA8LSBnZXRfb3JkZXIoc2VyaWF0ZShwX2VfY29yMSwgbWV0aG9kID0gIkhDIikpDQoNCiMgcmVvcmRlcg0KcF9yZW9yZDIgPC0gcF9lX3NjW3JldihvcmQyKSwgb3JkMV0NCmBgYA0KDQoNCmBgYHtyIHF1ZXN0aW9uM19iX2hlYXR9DQpwbG90X2x5KHggPX5jb2xuYW1lcyhwX3Jlb3JkMiksIHkgPX5yb3duYW1lcyhwX3Jlb3JkMiksDQogICAgeiA9IH5wX3Jlb3JkMiwgdHlwZSA9ICJoZWF0bWFwIiwgDQogICAgY29sb3JzID0gY29sb3JSYW1wKGMoImJsYWNrIiwicmVkIikpDQogICkgJT4lDQogIGxheW91dCh0aXRsZSA9ICAiSGVhdG1hcCBvZiBwcmljZXMgYW5kIGVhcm5pbmdzIChDb3IgZGlzdCkiLA0KICAgICAgICAgeGF4aXMgPSBsaXN0KHRpdGxlID0gIlByaWNlLUVhcm5pbmdzIEluZGljYXRvcnMiLCB6ZXJvbGluZSA9IEZBTFNFKSwNCiAgICAgICAgIHlheGlzID0gbGlzdCh0aXRsZSA9ICJDaXRpZXMiLCB6ZXJvbGluZSA9IEZBTFNFKQ0KICApDQpgYGANCg0KVGhlIG9yZGVyaW5nIGJ5IGV1Y2xpZGVhbiBkaXN0YW5jZSBwcm9kdWNlcyBhIGhlYXQgbWFwIHRoYXQgaXMgZWFzaWVyIHRvIGFuYWx5emUuIEF0IGZpcnN0IGdsYW5jZSB3ZSBjYW4gcGVyY2VpdmUgZm91ciBnZW5lcmFsIHJlZ2lvbnMgb2YgdHdvIGdyb3Vwcy4gVGhlIGZpcnN0IGdyb3VwIGhlYXQgbWFwIGNvbG9yIHRlbmRzIHRvd2FyZHMgYSBicmlnaHRlciBzaGFkZSBvZiByZWQgd2hpbGUgdGhlIHNlY29uZCBncm91cCB0ZW5kIHRvd2FyZHMgYSBkYXJrZXIgc2hhZGUgb2YgcmVkL2JsYWNrLiBBbHRob3VnaCB0aGVzZSBncm91cHMgY2FuIGJlIHNlZW4gaW4gdGhlIGNvcnJlbGF0aW9uIGRpc3RhbmNlIGhlYXQgbWFwLCBpdCBpcyBub3QgYXMgY2xlYXIgYXMgdGhlIGZpcnN0Lg0KDQpCYXNlZCBvbiB0aGUgZXVjbGlkZWFuIGRpc3RhbmNlIGhlYXQgbWFwLCBuZXQgd2FnZSB0ZW5kcyB0byBoaWdoZXIgdmFsdWVzIGZyb20gRHViYWkgd2hpbGUgdGhlIG51bWJlciBvZiBob3VycyB3b3JrZWQgZGVjcmVhc2UuIFRoaXMgaXMgdGhlIG9wcG9zaXRlIHRvIGNpdGllcyBsaWtlIERlbGhpLEJhbmtvayBhbmQgU2VvdWwuIEludGVyZXN0aW5nbHkgZm9vZCBjb3N0cyBhcmUgZ2VuZXJhbGx5IGxvdyBpbiB0aGUgY2l0aWVzIHdpdGggaGlnaGVlIHdvcmtpbmcgaG91cnMuIENhcmFjYXMgaXMgYW4gb3V0bGllciBiZWNhdXNlIGZvb2QgY29zdHMgYXJlIGhpZ2ggd2hpbGUgbmV0IHdhZ2UgYW5kIHRoZSBudW1iZXIgb2YgaG91cnMgd29ya2VkIHJlbWFpbnMgbG93Lg0KDQoNCiMjIyBRdWVzdGlvbiA0Lg0KYGBge3IgcXVlc3Rpb240X29yZGVyfQ0KIyB1c2UgcF9lX3JkaXN0IGFuZCBwX2VfY2Rpc3QgKGV1Y2xpZGVhbiBkaXN0YW5jZSkNCm9yZF9xNF8xIDwtIGdldF9vcmRlcihzZXJpYXRlKHBfZV9yZGlzdCwgbWV0aG9kID0gIlRTUCIpKQ0KDQpvcmRlcl9xNF8yIDwtZ2V0X29yZGVyKHNlcmlhdGUocF9lX2NkaXN0LCBtZXRob2QgPSAiVFNQIikpDQoNCnBfcmVvcmRfcTQgPC0gcF9lX3NjW3JldihvcmRfcTRfMSksIG9yZGVyX3E0XzJdDQpgYGANCg0KYGBge3IgcXVlc3Rpb240X2hlYXR9DQpwbG90X2x5KHggPX5jb2xuYW1lcyhwX3Jlb3JkX3E0KSwgeSA9fnJvd25hbWVzKHBfcmVvcmRfcTQpLA0KICAgIHogPSB+cF9yZW9yZF9xNCwgdHlwZSA9ICJoZWF0bWFwIiwgDQogICAgY29sb3JzID0gY29sb3JSYW1wKGMoImJsYWNrIiwicmVkIikpDQogICkgJT4lDQogIGxheW91dCh0aXRsZSA9ICAiSGVhdG1hcCBvZiBwcmljZXMgYW5kIGVhcm5pbmdzIChFdWNsaWQgZGlzdC0gVFNQKSIsDQogICAgICAgICB4YXhpcyA9IGxpc3QodGl0bGUgPSAiUHJpY2UtRWFybmluZ3MgSW5kaWNhdG9ycyIsIHplcm9saW5lID0gRkFMU0UpLA0KICAgICAgICAgeWF4aXMgPSBsaXN0KHRpdGxlID0gIkNpdGllcyIsIHplcm9saW5lID0gRkFMU0UpDQogICkNCmBgYA0KDQpgYGB7ciBxdWVzdGlvbjRfY3JpdH0NCiMgZnVuY3Rpb24gY3JldGVyaW9uIHRvIGNvbXBhcmUgdW5vcmRlcmVkIGRpc3RhbmNlIGFuZCBvcmRlcmVkDQojIGRpc3RhbmNlID0gcF9lX3JkaXN0IChyb3cgZGlzdGFuY2UpDQojIG9yID0gb3JkZXINCm9yMSA8LSBzZXJpYXRlKHBfZV9yZGlzdCwgbWV0aG9kID0gIkhDIikNCg0Kb3IyIDwtIHNlcmlhdGUocF9lX3JkaXN0LCBtZXRob2QgPSAiVFNQIikNCg0KcmVzdWx0MSA8LSByYmluZCh1bm9yZGVyZWQgPSBjcml0ZXJpb24ocF9lX3JkaXN0KSwgb3JkZXJlZCA9IGNyaXRlcmlvbihwX2VfcmRpc3Qsb3IxICkpDQoNCnJlc3VsdDIgPC0gcmJpbmQodW5vcmRlcmVkID0gY3JpdGVyaW9uKHBfZV9yZGlzdCksIG9yZGVyZWQgPSBjcml0ZXJpb24ocF9lX3JkaXN0LG9yMiApKQ0KYGBgDQoNCmBgYHtyIHF1ZXN0aW9uNF9jcml0MX0NCnJlc3VsdDENCmBgYA0KDQpgYGB7ciBxdWVzdGlvbjRfY3JpdDJ9DQpyZXN1bHQyDQpgYGANCg0KSEMgc29sdmVyIGhhcyBzaG9ydGVyIHBhdGggbGVuZ2h0IGNvbXBhcmVkIHRvIFRTUCBzb2x2ZXI=